library(ggplot2)
library(dplyr)
library(reshape2)
library(fst)
library(haven)
library(lm.beta)
library(miceadds)


load("main_data_local.rdata")

#create function for splitting income residuals in brackets
stacker <- 
  function(x){
    stack <- NA
    stack[x >=-3.25] <- -3
    stack[x > -2.75] <- -2.5
    stack[x > -2.25] <- -2
    stack[x > -1.75] <- -1.5
    stack[x > -1.25] <- -1
    stack[x > - .75] <- -.5
    stack[x > - .25] <- 0
    stack[x >   .25] <- .5
    stack[x >   .75] <- 1
    stack[x >  1.25] <- 1.5
    stack[x >  1.75] <- 2
    stack[x >  2.25] <- 2.5
    stack[x >  2.75] <- 3
    return(stack)
  }

# create bars per year for local elections  

# create empty data frame
bar_data <- data_frame()

inc_1985 <- 
  read.fst("ind1985.fst")

inc_1986 <- 
  read.fst("ind1986.fst")

inc_1987 <- 
  read.fst("ind1987.fst")

bef_father <-
  read.fst("bef1986.fst") %>% 
  select("PNR", "KOEN", "ALDER")

# Average over income 1985-1987

inc <- 
  left_join(inc_1985 %>% select(c("PNR", "SAMLINK_NY")), 
            inc_1986 %>% select(c("PNR", "SAMLINK_NY")), 
            by = "PNR") %>% 
  left_join(., inc_1987 %>% select(c("PNR", "SAMLINK_NY")),
            by = "PNR") %>% 
  mutate(SAMLINK_NY = rowMeans(cbind(SAMLINK_NY.x, SAMLINK_NY.y, SAMLINK_NY), na.rm = TRUE)) %>% 
  select(c("PNR", SAMLINK_NY))

inc <- 
  left_join(inc, bef_father, by = "PNR")

inc <-
  inc %>%
  filter(KOEN == 1 & ALDER > 17) 

data_out <- data_frame()
 
var <- c("run_kv", "VALGT_JN")
lab <- c("Running for \nmunicipality", 
         "Elected for \nmunicipality")

for(k in seq(1993, 2013, by = 4)){
  bef <-
    read.fst(paste("bef", k, ".fst", sep = "")) 
  
  koms <- unique(bef$KOM)
  
  for(j in 1:2){
    
  bef_j <- 
    bef %>% 
    select("PNR", "FAR_ID", "KOM")
  
  data_year <- 
    data_comp_local %>% 
    filter(four_years == k) %>% 
    left_join(., bef_j, by = "PNR") %>% 
    left_join(., inc, by = c("FAR_ID" = "PNR")) %>%
    filter(inc_res < quantile(inc_res, p = 0.999) &
             inc_res > quantile(inc_res, p = 0.001) ) %>%
    filter(first_elected == 0 | first_elected >= k) %>% 
    mutate(inc_res2 = (inc_res - mean(inc_res)) / sd(inc_res)) %>%
    filter(abs(inc_res2) <= 3.25) %>% 
    mutate(inc_res_bars = stacker(inc_res2))
  
    for(kom in koms){
      data_year_i <- 
        data_year %>%
        filter(KOM == kom) %>%
        group_by(KOEN, ALDER) %>% 
        mutate(inc_tile = ntile(SAMLINK_NY, 20)) %>% 
        ungroup()
      
      data_year_i_pol <- 
        data_year_i %>% 
        filter(.[var[j]] == 1)
      
      par_inc_m <- rep(NA, 20)
      par_inc_c <- rep(NA, 20)
      
      inc_res_m <- rep(NA, 13)
      inc_res_c <- rep(NA, 13)
      
      if(nrow(data_year_i_pol) > 3) {
        for(i in 1:20){
          par_inc_m[i] <- mean(data_year_i$inc_tile == i, na.rm = TRUE)
          par_inc_c[i] <- mean(data_year_i_pol$inc_tile == i, na.rm = TRUE)
        }
        for(i in 1:13){
          res <- seq(-3, 3, 0.5)[i]
          inc_res_m[i] <- mean(data_year_i$inc_res_bars == res, na.rm = TRUE)
          inc_res_c[i] <- mean(data_year_i_pol$inc_res_bars == res, na.rm = TRUE)
        }
        
        data_year_i <- 
          data_frame(kom   = kom, 
                     year  = k, 
                     type  = var[j],
                     label = lab[j], 
                     rep   = sum(par_inc_c * 1:20) - sum(par_inc_m * 1:20),
                     comp  = sum(inc_res_c * seq(-3, 3, 0.5)) - sum(inc_res_m * seq(-3, 3, 0.5)))
      
      data_out <- 
        bind_rows(data_out, data_year_i)
      }
    }
  }
  print(k)
  print(Sys.time())
}


kom_cont <- 
  c("101", "147", "151", "153", "155", "157", "159", "161", 
    "163", "165", "167", "169", "173", "175", "183", "185", 
    "187", "201", "217", "223", "400", "253", "269", "329", 
    "461", "563", "607", "727", "741", "751", "773", "825")

data_out <- 
  data_out %>% 
  mutate(merge = ! kom %in% kom_cont)

ests_elected <- matrix(NA, nrow = 4, ncol = 4)
ests_running <- matrix(NA, nrow = 4, ncol = 4)

# find interaction models each yearly diff-in-diff

for(i in 1:4){
  k <- seq(2001, 2013, 4)[i]
  
  data_mod_elected <- 
    data_out %>% 
    filter((year == k - 8 | year == k) & type == "VALGT_JN") %>% 
    mutate(post = year == k)
  
  mod_elected <- lm(formula = comp ~ rep * post * merge, 
                            data = data_mod_elected)
  
  ests_elected[1:2, i] <- summary(mod_elected)$coefficients[8, 1:2]
  ests_elected[  4, i] <- lm.beta(mod_elected)$standardized.coefficients[8]
  
  
  # repeat for running
  
  data_mod_running <- 
    data_out %>% 
    filter((year == k - 8 | year == k) & type == "run_kv") %>% 
    mutate(post = year == k)
  
  mod_running <- lm(formula = comp ~ rep * post * merge, 
                    data = data_mod_running)
  
  ests_running[1:2, i] <- summary(mod_running)$coefficients[8, 1:2]
  ests_running[  4, i] <- lm.beta(mod_running)$standardized.coefficients[8]
  
}

ests_running[3, ] <- paste("[",  round(ests_running[1, ] + qnorm(0.025) * ests_running[2, ], 3),
                           "; ", round(ests_running[1, ] + qnorm(0.975) * ests_running[2, ], 3), 
                           "]", sep = "")
ests_elected[3, ] <- paste("[",  round(ests_elected[1, ] + qnorm(0.025) * ests_elected[2, ], 3),
                           "; ", round(ests_elected[1, ] + qnorm(0.975) * ests_elected[2, ], 3), 
                           "]", sep = "")

ests_elected[c(1:2, 4),] <- round(as.numeric(ests_elected[c(1:2, 4),]), 3)
ests_running[c(1:2, 4),] <- round(as.numeric(ests_running[c(1:2, 4),]), 3)


## create plot of yearly slopes in each group: 
data_plot <- data_frame()
for(i in 1:6){
  k <- seq(1993, 2013, 4)[i]
  
  alpha <- c("cont", "merg")
  
  for(j in 0:1){
    data_mod <- 
      data_out %>% 
      filter(year == k & merge == j) 
    
    mod_elected <- lm(comp ~ rep, data = data_mod %>%  filter(type == "elected_kv"))
    mod_running <- lm(comp ~ rep, data = data_mod %>%  filter(type != "elected_kv"))
    
    data_plot <- 
      bind_rows(data_plot, 
                data_frame(alpha = alpha[j + 1],
                           year  = k + (j - .5) * 0.4,
                           est   = c(coef(mod_elected)[2], coef(mod_running)[2]),
                           se    = c(summary(mod_elected)$coefficients[2, 2],
                                     summary(mod_running)$coefficients[2, 2]),
                           type  = rev(lab)))
  }
  
  
}


data_plot <- 
  data_plot %>% 
  mutate(type = as.factor(type),
         type = factor(type, levels = levels(type)[c(2,1)]))

plot <- 
  ggplot(data = data_plot,
         aes(x = year, 
             y = est,
             ymax = est + se * qnorm(0.975),
             ymin = est + se * qnorm(0.025),
             alpha = alpha)) + 
  geom_point() + 
  geom_errorbar(width = 0) + 
  facet_grid(type  ~ .) + 
  theme_classic() +
  scale_y_continuous("Competence-representation trade-off") +
  scale_x_continuous("Year", breaks = seq(1993, 2013, 4)) +
  geom_vline(xintercept = 2003, linetype = "dashed", alpha = 0.5) + 
  scale_alpha_discrete("", range = c(0.25, 1), 
                       labels = c("Continuing \nmunicipalities",
                                  "Amalgamated \nmunicipalities"))  +
  theme(panel.spacing = unit(2, "lines")) + 
  geom_hline(yintercept = 0, alpha = 0.3, linetype = "dotted")
  